MYDB=$DB_SCOPE
GEO_YEAR=2017
DATE_REF=2013-01-01

MIN_DAY=2008-01-01
NUM_DAYS=3834

TMP1_FILE=z_tmp1.csv
TMP2_FILE=z_tmp2.csv

INPUT_FOLDER=../data/raw_data/geography
OUTPUT_FOLDER=../data/data_csv
mkdir -p $OUTPUT_FOLDER
mkdir -p $OUTPUT_FOLDER/covariates
mkdir -p $OUTPUT_FOLDER/timeLocationAggregations/location/CantonId
mkdir -p $OUTPUT_FOLDER/timeLocationAggregations/time/day
mkdir -p $OUTPUT_FOLDER/timeLocationAggregations/time/week
mkdir -p $OUTPUT_FOLDER/SpatialMatrices

######### MAKE TABLE OF TIME ##########
### Get all the dates
python3 listDates.py $MIN_DAY $NUM_DAYS $TMP1_FILE

# Convert into date fields
python3 makeDateTimeFields.py $TMP1_FILE $TMP2_FILE 1 '%Y-%m-%d' $DATE_REF

# Merge the two
importCSVFile $MYDB $TMP1_FILE tmp1
importCSVFile $MYDB $TMP2_FILE tmp2
equalJoin $MYDB 1 tmp3 tmp1 tmp2 rowId

# Retain only relevant columns
sqlite3 $MYDB "create table mydays as \
    select Date, yearMonthDay, yearMonth, year, \
        cast(month as int) as month, cast(day as int) as dayOfMonth, \
        cast (weekNum as int) as weekNum, cast(dayNum as int) as dayNum, \
        cast(dayOfWeek as int) as dayOfWeek \
    from tmp3;"

# Clean up
dropTables $MYDB tmp1 tmp2 tmp3
rm $TMP1_FILE $TMP2_FILE

### Make table mymonths
sqlite3 $MYDB "create table mymonths as select distinct yearMonth, year, month from mydays order by yearMonth;"

### Make table of weeks, which assigns each week to a month and to a year, based on the number of days of that week that fall
### within the month / year
# Make week2month
calcAggregate $MYDB 1 z1 mydays COUNT weekNum yearMonth
keepSingleRow4Group $MYDB 1 z2 z1 MAX mycount weekNum
selectColumns $MYDB 1 week2month z2 weekNum yearMonth
dropTables $MYDB z1 z2

# Put the two together
sqlite3 $MYDB "create table myweeks as \
    select a.weekNum, a.yearMonth, b.year, b.month \
    from week2month a, mymonths b \
    where a.yearMonth = b.yearMonth;"

# Clean up
dropTables $MYDB week2month mymonths


######### MAKE TABLE OF LOCATIONS (COMMUNES) ##########
importCSVFile $MYDB $INPUT_FOLDER/communes2_2017.csv all_communes
sqlite3 $MYDB "create table current_communes as select * from all_communes where ACTUAL = 1;"
sqlite3 $MYDB "create table mycommunes as \
    select CodeInsee, CantonId, DEP, RegionId \
    from current_communes \
    where CodeInsee != '55039' and CodeInsee != '55050' \
        and  CodeInsee != '55139' and  CodeInsee != '55189' and  CodeInsee != '55239' and  CodeInsee != '55307' \
        and DEP not like '97%';"


########## DETERMINE THE SCOPE ##########
sqlite3 $MYDB "create table Scope_CantonId as    select distinct CantonId, DEP, RegionId   from mycommunes order by CantonId;"
sqlite3 $MYDB "create index if not exists index_Scope_CantonId  ON Scope_CantonId  (CantonId);"

### Time:
MIN_DAY=2013-01-01
MAX_DAY=2017-12-31
sqlite3 $MYDB "create table Scope_day as \
    select distinct dayNum, weekNum, yearMonth, dayOfWeek, month, year \
    from mydays where date >= '$MIN_DAY' and date <= '$MAX_DAY';"
sqlite3 $MYDB "create table Scope_week as \
    select distinct a.weekNum, a.yearMonth, a.year, a.month \
    from myweeks a, mydays b \
    where a.weekNum = b.weekNum and b.date >= '$MIN_DAY' and b.date <= '$MAX_DAY' \
    order by a.weekNum;"
sqlite3 $MYDB "create index if not exists index_Scope_day   ON Scope_day   (dayNum);"
sqlite3 $MYDB "create index if not exists index_Scope_week  ON Scope_week  (weekNum);"

### Location-Time:
for TIME_LEVEL in day week; do
    if [ $TIME_LEVEL == "day" ]; then TIME_ID=dayNum; fi
    if [ $TIME_LEVEL == "week" ]; then TIME_ID=weekNum; fi
    cartesianProduct $MYDB 1 Scope_CantonId_${TIME_LEVEL} Scope_CantonId Scope_${TIME_LEVEL}
    sqlite3 $MYDB "create index if not exists index_Scope_CantonId_${TIME_LEVEL} \
        ON Scope_CantonId_${TIME_LEVEL} (CantonId, $TIME_ID);"
done



########## OUTPUT GEOGRAPHIC SCOPES ##########
exportCSVFile $MYDB Scope_CantonId $OUTPUT_FOLDER/covariates/Scope_CantonId.csv

########## OUTPUT TIME SCOPES ##########
for TIME_LEVEL in day week; do
    exportCSVFile $MYDB Scope_${TIME_LEVEL} $OUTPUT_FOLDER/covariates/Scope_${TIME_LEVEL}.csv
done

########## OUTPUT PERIOD LABELS ##########
sqlite3 $MYDB "create table dayLabels as \
    select a.dayNum as dayNum, b.date as periodLabel \
    from Scope_day a, mydays b \
    where a.dayNum = b.dayNum; "

sqlite3 $MYDB "create table weekLabels as \
        select a.weekNum as weekNum, min(b.date) || ' to ' || max(b.date) as periodLabel \
        from Scope_week a, mydays b \
        where a.weekNum = b.weekNum \
        group by a.weekNum \
        order by weekNum; "

# Remove data for 2018
sqlite3 $MYDB "delete from dayLabels where dayNum >= 1826;"
sqlite3 $MYDB "delete from weekLabels where weekNum >= 260;"

exportCSVFile $MYDB weekLabels $OUTPUT_FOLDER/covariates/periodLabels_week.csv
exportCSVFile $MYDB dayLabels $OUTPUT_FOLDER/covariates/periodLabels_day.csv



###### COPY STUFF
cp -r $INPUT_FOLDER/centroids.csv $OUTPUT_FOLDER/centroids.csv



#########################################################################################################
##### MAKE DUMMIES FOR AGGREGATE LEVELS

##### Add quarter to time tables
# Add quarter to mydays
sqlite3 $MYDB "create table mydays2 as select *, '' as quarter from mydays;"
sqlite3 $MYDB "update mydays2 set quarter = cast(year as text) || '1' where month <= 3;"
sqlite3 $MYDB "update mydays2 set quarter = cast(year as text) || '2' where month > 3 and month <= 6;"
sqlite3 $MYDB "update mydays2 set quarter = cast(year as text) || '3' where month > 6 and month <= 9;"
sqlite3 $MYDB "update mydays2 set quarter = cast(year as text) || '4' where month > 9;"

# Get mapping yearMonth to [quarter]
selectDistinct $MYDB 1 month2quarter mydays2 yearMonth quarter

# Get mapping weekNum to [yearMonth, quarter]
equalJoin $MYDB 1 week2month_and_quarter myweeks month2quarter yearMonth

# Put mappings into Scope_week
sqlite3 $MYDB "create table Scope_week2 as \
    select a.weekNum, b.yearMonth, b.quarter, b.year \
    from Scope_week a, week2month_and_quarter b \
    where a.weekNum = b.weekNum \
    order by a.weekNum;"
sqlite3 $MYDB "delete from Scope_week2 where weekNum >= 260;"

# Put mappings into Scope_day
sqlite3 $MYDB "create table Scope_day2 as \
    select a.dayNum, a.weekNum, a.yearMonth, b.quarter, a.year, a.dayOfWeek \
    from Scope_day a, month2quarter b \
    where a.yearMonth = b.yearMonth \
    order by a.dayNum;"

# Clean up
dropTables $MYDB month2quarter week2month_and_quarter mydays2


##### Add oldRegionId to geography tables
# Get matching DEP --> oldRegionId
importCSVFile $MYDB $INPUT_FOLDER/depts2012.csv DEP2oldRegion

# Add oldRegionId to all tables
sqlite3 $MYDB "create table Scope_CantonId2 as \
    select a.*, b.RegionId as oldRegionId \
    from Scope_CantonId a, DEP2oldRegion b \
    where a.DEP = b.DEP;"

# Clean up
dropTables $MYDB DEP2oldRegion

########## Location-specific ##########
for AGG_GEO_LEVEL in DEP RegionId oldRegionId; do
    echo "$AGG_GEO_LEVEL"
    MYFOLDER=$OUTPUT_FOLDER/timeLocationAggregations/location/CantonId
    mkdir -p $MYFOLDER
    selectColumns $MYDB 1 tmp1 Scope_CantonId2 CantonId $AGG_GEO_LEVEL
    exportCSVFile $MYDB tmp1 $MYFOLDER/${AGG_GEO_LEVEL}.csv
    dropTables $MYDB tmp1        
done

########## Time-specific ##########
for TIME_LEVEL in day week; do
    for AGG_TIME_LEVEL in week month quarter dayOfWeek; do
        if [ $TIME_LEVEL == $AGG_TIME_LEVEL ]; then continue; fi
        if [ $TIME_LEVEL != "day" ]    && [ $AGG_TIME_LEVEL == 'dayOfWeek' ]; then continue; fi
        if [ $TIME_LEVEL == "day" ]; then TIME_ID=dayNum; fi
        if [ $TIME_LEVEL == "week" ]; then TIME_ID=weekNum; fi
        if [ $TIME_LEVEL == "month" ]; then TIME_ID=yearMonth; fi
        if [ $AGG_TIME_LEVEL == "day" ]; then AGG_TIME_ID=dayNum; fi
        if [ $AGG_TIME_LEVEL == "week" ]; then AGG_TIME_ID=weekNum; fi
        if [ $AGG_TIME_LEVEL == "month" ]; then AGG_TIME_ID=yearMonth; fi
        if [ $AGG_TIME_LEVEL == "quarter" ]; then AGG_TIME_ID=quarter; fi
        if [ $AGG_TIME_LEVEL == "dayOfWeek" ]; then AGG_TIME_ID=dayOfWeek; fi
        
        echo "$TIME_LEVEL - $AGG_TIME_LEVEL - $TIME_ID - $AGG_TIME_ID"
        
         MYFOLDER=$OUTPUT_FOLDER/timeLocationAggregations/time/${TIME_LEVEL}
         mkdir -p $MYFOLDER
         selectColumns $MYDB 1 tmp1 Scope_${TIME_LEVEL}2 $TIME_ID $AGG_TIME_ID
         exportCSVFile $MYDB tmp1 $MYFOLDER/${AGG_TIME_LEVEL}.csv
         dropTables $MYDB tmp1
    done
done

dropTables $MYDB Scope_CantonId2 Scope_day2 Scope_week2

#########################################################################################################
###### MAKE CONTIGUITY #####
CONTIGUITY_FILE=$INPUT_FOLDER/contiguity.csv

importCSVFile $MYDB $CONTIGUITY_FILE contiguity
sqlite3 $MYDB "delete from contiguity where CantonId1 like '97%' or CantonId2 like '97%';"

# Some cantons have no contiguity (islands). Fix this issue manually by setting contiguity with closest canton.
#   # WARNING: If I were to launch this with communes (CodeInsee), I would likely have to fix such issues too.
sqlite3 $MYDB "insert into contiguity(CantonId1, CantonId2) values('1704', '1703');" # 1704 is ile d'Oleron
sqlite3 $MYDB "insert into contiguity(CantonId1, CantonId2) values('1705', '1799');" # 1705 is ile de Re
sqlite3 $MYDB "insert into contiguity(CantonId1, CantonId2) values('1713', '1725');" # Rochefort (weird)
sqlite3 $MYDB "insert into contiguity(CantonId1, CantonId2) values('2103', '2117');" # Beaune (surrounded by 2117
sqlite3 $MYDB "insert into contiguity(CantonId1, CantonId2) values('5609', '5612');" # Groix
sqlite3 $MYDB "insert into contiguity(CantonId1, CantonId2) values('8507', '8516');" # ile d'Yeu

# Include the symmetric as well
sqlite3 $MYDB "insert into contiguity(CantonId1, CantonId2) values('1703', '1704');" # 1704 is ile d'Oleron
sqlite3 $MYDB "insert into contiguity(CantonId1, CantonId2) values('1799', '1705');" # 1705 is ile de Re
sqlite3 $MYDB "insert into contiguity(CantonId1, CantonId2) values('1725', '1713');" # Rochefort (weird)
sqlite3 $MYDB "insert into contiguity(CantonId1, CantonId2) values('2117', '2103');" # Beaune (surrounded by 2117
sqlite3 $MYDB "insert into contiguity(CantonId1, CantonId2) values('5612', '5609');" # Groix
sqlite3 $MYDB "insert into contiguity(CantonId1, CantonId2) values('8516', '8507');" # ile d'Yeu

# Output contiguity to csv
sqlite3 $MYDB "create table contiguity2 as select *, 1 as one from contiguity;"
exportCSVFile $MYDB contiguity2 $OUTPUT_FOLDER/SpatialMatrices/Contiguity.csv

# Clean up
dropTables $MYDB contiguity contiguity2


################################### AREAS (SUPERFICIES) ########################################
# Import areas of communes
importCSVFile $MYDB $INPUT_FOLDER/commune_areas.csv com_areas0
sqlite3 $MYDB "create table com_areas as \
    select a.CodeInsee, cast(Area as REAL) as Area, b.* \
    from com_areas0 a, mycommunes b \
    where a.CodeInsee = b.CodeInsee ;"
dropTables $MYDB com_areas0

# Aggregate by geography
sqlite3 $MYDB "create table tmp1 as \
    select CantonId, 1e-6*sum(Area) as Area \
    from com_areas \
    group by CantonId;"

sqlite3 $MYDB "create table Area_CantonId as \
    select a.CantonId, b.Area \
    from Scope_CantonId a left join tmp1 b on a.CantonId = b.CantonId;"

dropTables $MYDB tmp1
dropTables $MYDB com_areas
